In [ ]:

    
%matplotlib inline

import numpy as np
from numpy.fft import fft2, ifft2, fftshift, ifftshift

from scipy.stats import multivariate_normal

import matplotlib.pyplot as plt

import menpo.io as mio
from menpo.image import Image
from menpo.feature import hog, no_op
from menpo.shape import PointCloud
from menpo.visualize import visualize_images

from templatetracker.correlationfilter.correlationfilter import (
    learn_mosse, learn_mccf, learn_deep_cf)
from templatetracker.correlationfilter.utils import (
    build_grid, normalizenorm_vec, fast2dconv, crop)



In [ ]:

    
def greyscale(i):
    return i.as_greyscale('average')

def greyscale_hog(i):
    return hog(greyscale(i))

def combine(i):
    return Image(np.concatenate((i.pixels, greyscale(i).pixels, greyscale_hog(i).pixels)))

Kernelized Correlation Filters

Load and manipulate data

Load landmarked facial images.



In [ ]:

    
images = []
for i in mio.import_images('/Users/joan/PhD/DataBases/faces/lfpw/trainset/*', verbose=True, 
                           max_images=300):
    i.crop_to_landmarks_proportion_inplace(0.5)
    i = i.rescale_landmarks_to_diagonal_range(100)
    images.append(i)



In [ ]:

    
visualize_images(images)

Extract 31 x 31 patches around landmark number 45 (the corner of the left eye) from the previous images. Note that any other landmark could be chosen.



In [ ]:

    
patch_shape = np.asarray((101, 101))
lm_number = 45
features = greyscale # no_op, gresycale, greyscale_hog

image_patches = []
pixel_patches = []
for i in images:
    image_patches.append(i.extract_patches_around_landmarks(patch_size=patch_shape)[lm_number])
    feature_patches = features(image_patches[-1])
    pixel_patches.append(feature_patches.pixels)



In [ ]:

    
visualize_images(image_patches)

Store patches as numpy array.



In [ ]:

    
X = np.asarray(pixel_patches)

These are the patches that we will used in order to define and test our Kernelized Correlation Filters.

Define the desired response for each patch. Note that, because all patches are centred about the same landmark they share the same desired response, i.e. a 2 dimensional Gaussian response centred at the middle of the patch.



In [ ]:

    
cov = 3

# define Gaussian response
mvn = multivariate_normal(mean=np.zeros(2), cov=cov)
grid = build_grid((31, 31))
y = mvn.pdf(grid)[None]



In [ ]:

    
plt.title('Desired response')
plt.imshow(y[0])

Learn Correlation Filter (CF)

At this point we will use the first image patch as the template from which to learn a CF. Note that we could have chosen any other image patch.



In [ ]:

    
# img_number = 0
# x = X[img_number]

Apart from the kernel correlation specific parameters, we need to make some choices regarding the overall learning procedure.



In [ ]:

    
# whether to normalize the image
normalize = True
# wheter to mask the images with a cosine mask
mask = True
# regularization parameter
l = 0.1
# type of filter
filter_type = 'deep_mosse'
# boundary padding
boundary = 'symmetric'

c1 = np.hanning(patch_shape[0])
c2 = np.hanning(patch_shape[1])
cosine_mask = c1[..., None].dot(c2[None, ...]) if mask else None

We are now ready to learn a CF for the first image patch.



In [ ]:

    
X_ = np.empty_like(X)
for j, x in enumerate(X):
    x_ = normalizenorm_vec(x) if normalize else x
    x_ = cosine_mask * x_ if mask else x_
    X_[j] = x_

if filter_type is 'mosse':
    cf, _, _ = learn_mosse(X_, y, l=l, boundary=boundary)
elif filter_type is 'mccf':
    cf, _, _ = learn_mccf(X_, y, l=l, boundary=boundary)
elif filter_type is 'deep_mosse':
    cf, _, _ = learn_deep_cf(X_, y, learn_cf=learn_mosse, n_levels=1, l=l, boundary=boundary)
elif filter_type is 'deep_mccf':
    cf, _, _ = learn_deep_cf(X_, y, learn_cf=learn_mccf, n_levels=3, l=l, boundary=boundary)
    
cf = cf



In [ ]:

    
# only the up to the first 5 channels are shown
n_channels = np.minimum(5, cf.shape[0])
fig_size = (3*n_channels, 3*n_channels)

fig = plt.figure()
fig.set_size_inches(fig_size)
for j, c in enumerate(cf[:n_channels]):
    plt.subplot(1, n_channels, j+1)
    plt.title('CF in spatial domain')
    plt.imshow(cf[j])

fig = plt.figure()
fig.set_size_inches(fig_size)
for j, c in enumerate(cf[:n_channels]):
    plt.subplot(1, n_channels, j+1)
    plt.title('CF in frequency domain')
    plt.imshow(np.abs(fftshift(fft2(cf[j]))))

Test KCF

In order to test the correctness of the learned KCF we will extract 61 x 61 patches centred around landmark number 31, i.e the right corner of the nose. Note that we will now expect to get responses whith peaks shifted towards the right hence, correctly localizing the eye corner for which the KCF was learned.



In [ ]:

    
lm_test = 42

patch_shape2 =(81, 81)

image_prime_patches = []
pixel_prime_patches = []
for i in images:
    image_prime_patches.append(i.extract_patches_around_landmarks(patch_size=patch_shape2)[lm_test])
    feature_prime_patches = features(image_prime_patches[-1])
    pixel_prime_patches.append(feature_prime_patches.pixels)



In [ ]:

    
visualize_images(image_prime_patches)

Store patches as numpy array.



In [ ]:

    
X_prime = np.asarray(pixel_prime_patches)



In [ ]:

    
rs = [] 
for z in X_prime:
    z_ = normalizenorm_vec(z) if normalize else z
    cf_ = normalizenorm_vec(cf) if normalize else cf
    # compute correlation response
    r = np.sum(fast2dconv(z_, cf, boundary=boundary), axis=0)[None]
    rs.append(r)



In [ ]:

    
# only up to the first 5 images are shown
n_images = np.minimum(5, len(X_prime))
fig_size = (3*n_images, 3*n_images)

fig = plt.figure()
fig.set_size_inches(fig_size)
for j, r in enumerate(rs[:n_images]):
    plt.subplot(1, n_images, j+1)
    plt.title('response')
    plt.imshow(r[0])

fig = plt.figure()
fig.set_size_inches(fig_size)
for j, (r, i) in enumerate(zip(rs[:n_images], image_prime_patches[:n_images])):
    plt.subplot(1, n_images, j+1)
    plt.title('original image')
    peak = np.asarray(np.unravel_index(r.argmax(), r.shape))[1:]
    i.landmarks['peak'] = PointCloud(peak[None, ...])
    i.view_landmarks(marker_face_colour='r', figure_size=fig_size)